# Calculate Aspiration variable for all available countries using world bank data:
#
#

library(WDI)
library(countrycode)
library(reshape)
library(foreign)

WDIsearch(string="GDP", field="name")
WDIsearch(string="Employment", field="name")

dat <- WDI(country="all", indicator="NY.GDP.MKTP.KD", start=1950, end=2014) # GDP in constant 2000 dollars

country <- as.factor(dat$country)
s.dat <- split(dat, country)
s.dat <- lapply(s.dat, function(x) x[order(x$year), ]) #Sort by year so it makes sense

#Create new variable
yr.funk <- function(gdp){
  
  x.10ave <- numeric(length(gdp))
  
  for(i in 1:length(gdp)){
    
    if(i<10){ x.10ave[i] <- mean(gdp[1:i], na.rm=T)} #If i is less than 10, then take the mean of 1:i
    
    if(i>=10) { x.10ave[i] <- mean(gdp[i:(i-9)], na.rm=T) } #Else take average of prior 10 values
  }
  return(x.10ave)
}

yr10gdp <- lapply(s.dat, function(x) yr.funk(x$NY.GDP.MKTP.KD))
yr10gdp <- unsplit(yr10gdp, country)

#Unsplit and attach
dat <- unsplit(s.dat, country)
dat$country <- NULL
dat$gdp10ave <- yr10gdp

#Get just annual growth: NY.GDP.MKTP.KD.ZG (annual % of GDP)
gro <- WDI(country="all", indicator="NY.GDP.MKTP.KD.ZG", start=1950, end=2014) 
names(gro)[grepl("NY.GDP.MKTP.KD.ZG", names(gro))] <- "gdp.growth"
gro$gdp.growth.bin <- cut(gro$gdp.growth, breaks=unique(quantile(gro$gdp.growth, na.rm=T, probs=seq(0,1, length.out=20))))

#Create the aspiration difference variable: percent of gdp gain/loss relative to prior ten years
dat <- rename(dat, c(NY.GDP.MKTP.KD = "GDP"))
dat$asp.gdp <- (dat$GDP-dat$gdp10ave)/dat$gdp10ave
dat$asp.gdp.bin <- cut(dat$asp.gdp, breaks=unique(quantile(dat$asp.gdp, na.rm=T, probs=seq(0,1, length.out=20))))

#Create GDP bin variable
dat$lgdp <- log(dat$GDP)
dat$lgdp.bin <- cut(dat$lgdp, breaks=unique(quantile(dat$lgdp, na.rm=T, probs=seq(0,1, length.out=20))))

# Create the unemployment measure: SL.UEM.TOTL.ZS -"Unemployment, total (% of total labor force)" 
# Create income of median citizen measure: SI.DST.03RD.20 - "Income share held by third 20%"
unemp.inc <- WDI(country="all", indicator=c("SL.UEM.TOTL.ZS", "SI.DST.03RD.20"), start=1950, end=2014) 
unemp.inc <- rename(unemp.inc, c(SL.UEM.TOTL.ZS="total_unemployment", SI.DST.03RD.20='income_share_third20'))
unemp.inc$country <- NULL

# Merge the two growth measures
dat <- merge(gro[, c(1,3,4,5)], dat, by=c("iso2c", "year"))
dat <- merge(dat, unemp.inc, by=c("iso2c", "year"), all.x=T)

#create mergeable country-code for the rest of the data
dat$cown <- countrycode(dat$iso2c, origin="iso2c", destination="cown")
dat <- dat[!is.na(dat$cown), ]

#Get inflation data:
inf <- WDI(country="all", indicator="NY.GDP.DEFL.KD.ZG", start=1950, end=2014) # Inflation deflator # NY.GDP.DEFL.KD.ZG
inf$cown <- countrycode(inf$iso2c, origin="iso2c", destination = "cown")
inf <- inf[, c("cown", "year", "NY.GDP.DEFL.KD.ZG")]
inf <- rename(inf , c(NY.GDP.DEFL.KD.ZG="wb.inflation"))
inf <- inf[!is.na(inf$cown), ]

#merge the two datasets
dat <- merge(dat, inf, by=c("cown", "year"))

#Get polity scores - both current and lagged
p4 <- read.csv("/Volumes/TINY CRYPT/papers/Working Projects/Lazer Lab/LL Elections Project/Source Data/p4v2014.csv")
p4$ccode[which(p4$country=="Montenegro")] <- 341
p4 <- p4[-grep("Kosovo", p4$country), c("ccode", "year", "polity2")] #Remove Kosovo mislabeled as Montenegro in source data
p4$year <- p4$year + 1 #Increment year by one to get lagged polity
p4 <- p4[, c("ccode", "year", "polity2")]
names(p4)[3] <- "l1polity2"
dat <- merge(dat, p4, by.x=c("cown", "year"), by.y=c("ccode", "year"))

p4 <- read.csv("/Volumes/TINY CRYPT/papers/Working Projects/Lazer Lab/LL Elections Project/Source Data/p4v2014.csv")
p4$ccode[which(p4$country=="Montenegro")] <- 341
p4 <- p4[-grep("Kosovo", p4$country), c("ccode", "year", "polity2")] #Remove Kosovo mislabeled as Montenegro in source data
names(p4)[3] <- "polity2"
dat <- merge(dat, p4, by.x=c("cown", "year"), by.y=c("ccode", "year"))
dat$polity.diff <- dat$polity2-dat$l1polity2

###############
#Get a few more democracy variables
########################
#Freedom House - scrape from QOG data, ccode is the relevant code to merge
qog <- read.csv("/Volumes/TINY CRYPT/papers/Working Projects/PCSE Diffusion/Source Data/qog_std_ts_20dec13.csv", sep=";")
qog <- qog[, c("ccode", "year", "fh_status")] #Gets FH score and Polity combined score
qog$cown <- countrycode(qog$ccode, "iso3n", "cown", warn=T) #The states that don't match are often temporary states, such as north and south vietnam
names(qog)[3] <- "democ_fh"
names(qog)[1] <- "iso3n"
qog <- qog[!is.na(qog$cown), ] #Remove missing country identifiers
  #MERGE::::::
dat <- merge(dat, qog, by=c("cown", "year"), all.x=T, sort=F)

#Cheibub, Gandhi, and Vreeland
cgv <- read.csv("/Volumes/TINY CRYPT/papers/Working Projects/Pre-Fall 2012/dbab_project/dbab working files/Data/ddrevisited_data_v1.csv") 
cgv <- cgv[, c("year", "cowcode", "democracy")] #cowcode is the COW numeric code
names(cgv)[2:3] <- c("cown", "democ_cgv")
cgv <- cgv[!is.na(cgv$cown), ]

#MERGE::::::
dat <- merge(dat, cgv, by=c("cown", "year"), all.x=T, sort=F)

#Get Region
dat$region <- countrycode(dat$cown, origin="cown", destination="region")

#Write the final dataset. 
setwd("/Volumes/TINY CRYPT/papers/Working Projects/Lazer Lab/LL Elections Project/New Polling Data and Code/Data/")
write.csv(dat, "aspiration_and_polity_07212016.csv")

